# -*- coding: utf-8 -*-
import scrapy
from daohang.items import DaohangItem


class SinaSpider(scrapy.Spider):
    name = 'sina'
    allowed_domains = ['news.sina.com.cn']
    start_urls = ['http://news.sina.com.cn/guide/']  # 开始访问的url

    def parse(self, response):
        title = DaohangItem()  # 实例化items的DaohangItem类，从而调用类里的变量
        for items in response.css('.article .section'):  # 解析<div class="section" id="tab01">...</div>
            title['title_h2'] = items.css('.tit01::text').extract()  # 解析大标题
            for item in response.css('.article .section .clearfix'):  # 解析<div class="clearfix" ...>...</div>解析导航块
                if item.css('.tit02 a'):  # 判断二级标签
                    title['title_h3'] = item.css('.tit02 a::text').extract_first()  # 解析二级标签
                elif item.css('.tit02 span'):
                    title['title_h3'] = item.css('.tit02 span::text').extract_first()
                else:
                    title['title_h3'] = item.css('.tit02::text').extract_first()
                title['title_li'] = item.css('li a::text').extract()  # 解析详情li
                yield title
